######################################################################
#Replication file for "The Impact of China's AIIB on the World Bank"
#Authors: Jing Qian, James Vreeland, Jianzhi Zhao
#Codes to replicate Figure CF.1 and Table C.2
######################################################################
rm(list = ls())

#To install the bpCausal package (ver 0.0.1)
# install.packages('devtools', repos = 'http://cran.us.r-project.org') # if not already installed
# devtools::install_github('liulch/bpCausal')

#Load bpCausal package
library(bpCausal)

#Other packages used
library(tidyverse)
library(RColorBrewer)

#Setwd
setwd("C:/Users/qianj/Dropbox (Princeton)/AIIB_WB_Replication")

#Load custom functions
#Note: Given the number of tests performed in the appendix, 
#several custom functions are created to implement estimation methods in a more efficient way.
#These functions merely create a wrap-up of existing functions, like bpCausal and gsynth,
#in order to avoid typing parameters repeatedly.

source("code/custom_functions.R")

###################################
#Replicate Table C.2
###################################
#--------------
#(1) Load data
#--------------
load("data/data_main.RData")

#--------------
#(2) Setup
#--------------
D.use = "aiib_founder_2016"
dv.list = c("soft_project_count_all")
covs.all = c("gdppc_log_lag",
             "population_log_lag",
             "election_either_lag",
             "fdi_gdp_lag",
             "debt_gni_lag",
             "oda_gni_lag",
             "polity2_lag",
             "unsc_lag",
             "IdealPointDistance_lag")
index.use = c("iso2c", "year")
df.use = df.main

#--------------
#(3) Estimation: Table C.2, column (1)
#~1.4 minutes
#--------------
#No covariate
X.use = Z.use = A.use = c()

#Estimate
tab.c2.1 = bpcausal.group(dv.list = dv.list,
                          df.use = df.use,
                          D.use = D.use,
                          X.use = X.use,
                          Z.use = Z.use,
                          A.use = A.use)


#Get estimated coefficients and intervals
att.tab.c2.1 = effSummary(tab.c2.1$soft_project_count_all)
att.tab.c2.1$est.avg #Estimated ATT as reported in Table C.2, Column (1)
att.tab.c2.1$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table C.2

#Number of observations
df.use.complete = df.main[, c(dv.list, D.use, index.use)] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#Treated and Control Units
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder_2016 == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units

#--------------
#(4) Estimation: Table C.2, column (2)
#~3 minutes
#--------------
#With covariates
X.use = Z.use = A.use = covs.all

#Estimate
tab.c2.2 = bpcausal.group(dv.list = dv.list,
                          df.use = df.use,
                          D.use = D.use,
                          X.use = X.use,
                          Z.use = Z.use,
                          A.use = A.use)


#Get estimated coefficients and intervals
att.tab.c2.2 = effSummary(tab.c2.2$soft_project_count_all)
att.tab.c2.2$est.avg #Estimated ATT as reported in Table C.2, Column (2)
att.tab.c2.2$est.avg %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table C.2

coef.tab.c2.2 = coefSummary(tab.c2.2$soft_project_count_all)
coef.tab.c2.2$est.beta[-1,] #Estimated invariant component of covariate coefficients (ignore the intercept in first row)
coef.tab.c2.2$est.beta[-1,] %>%
  round(digits = 3) %>%
  format(nsmall = 3) #Rounding, as reported in Table C.2

#Number of observations
df.use.complete = df.main[, c(dv.list, D.use, covs.all, index.use)] %>%
  .[complete.cases(.), ]
nrow(df.use.complete) #Number of observations

#Treated and Control Units
iso2c.treated = unique(df.use.complete$iso2c[df.use.complete$aiib_founder_2016 == 1])
iso2c.all = unique(df.use.complete$iso2c)

length(iso2c.treated) #Treated Units
length(iso2c.all) - length(iso2c.treated) #Control Units

###################################
#Produce Figure C.1
###################################
#-------------
#(1) Prepare results
#-------------
#Summarize
result.use = effSummary(tab.c2.2$soft_project_count_all)

#-------------
#(2) Parameters
#-------------
#Titles
xtitle = ""
ytext = "Effect on New World Bank Non-Infrastructure Projects"

#Limits
ylim = c(-3, 3)
newx = 1992:2019
t.time = 2016

#Cex
cex = 2

#-------------
#(3) Plot
#-------------
#Setup
png(filename = "figure/Figure_C1.png",
    height = 800,
    width = 1200)
par(mar = c(2, 5, 5, 1),
    lend = 1)

#----------
#Plot empty graph
#----------
plot(1,
     type = "n",
     xlab = "",
     ylab = "",
     axes = F,
     xlim = range(newx),
     ylim = ylim)
box()

title(main = "World Bank Non-Infrastructure Projects",
      line = 1,
      cex.main = cex)

#-----------------------
#Add axis
#-----------------------
#X-axis labels
axis(side = 1, 
     at = seq(min(newx),
              max(newx),
              by = 2),
     cex.axis = cex)
#X-axis title
mtext(text = xtitle, 
      side = 1, 
      line = 2.5,
      cex = cex)
#Y-axis
axis(side = 2,
     at = seq(min(ylim),
              max(ylim),
              by = 1),
     cex.axis = cex)
#Y-axis title
mtext(text = ytext,
      side = 2,
      line = 2.5, 
      cex = cex)

#-----------------------
#Lines and shades
#-----------------------
#Ablines
abline(v = t.time,
       col = "gray",
       lty = 2,
       lwd = cex)
abline(h = 0,
       col = "gray20",
       lty = 2,
       lwd = cex)

#ATTs
lines(x = newx,
      y = result.use$est.eff$estimated_ATT,
      col = 1,
      lty = 1,
      lwd = cex)
#CI
polygon(x = c(rev(newx), 
              newx),
        y = c(rev(result.use$est.eff$estimated_ATT_ci_l), 
              result.use$est.eff$estimated_ATT_ci_u),
        col = "#55555530",
        border = NA)
#Legend
legend("topleft",
       legend = c("Estimated ATT",
                  "95% Credible Intervals"),
       cex = cex,
       seg.len = 2,
       col = c(1, "#55555530"),
       lty = c(1, 5),
       lwd = c(2, 20),
       bty = "n")

#Close
dev.off()